## behavioral data:

d <- fread(here("in", "behavior-and-events_stroop_2021-10-20_nice.csv"))
subjs_behav <- as.data.table(table(d$subj, d$session, d$wave))
names(subjs_behav) <- c("subj", "session", "wave", "nrow_eprime")
# subjs_behav$is_missing <- subjs_behav$nrow_eprime == 0
# subjs_behav$is_incomplete <- subjs_behav$nrow_eprime < 216




## fmri data (list of subjs who have):

fnames_subjlist <- list.files(here("in/subjs"), pattern = "^subjlist", full.names = TRUE)
names(fnames_subjlist) <- gsub("^.*stroop-rsa-pc/in/subjs/subjlist_fmri_(.*)\\.txt", "\\1", fnames_subjlist)
subjs_fmri <- rbindlist(lapply(fnames_subjlist, fread), idcol = "dmcc_session")
names(subjs_fmri)[2] <- "subj"
subjs_fmri <- separate(subjs_fmri, dmcc_session, c("dmcc", "session"))
subjs_fmri$wave <- ifelse(subjs_fmri$dmcc == "dmcc2", "wave1", ifelse(subjs_fmri$dmcc == "dmcc3", "wave2", "wave3"))
subjs_fmri$dmcc <- NULL
subjs_fmri <- as.data.table(subjs_fmri)
subjs_fmri$needs_rerun <- grepl("_", subjs_fmri$subj)
subjs_fmri$subj <- gsub("(.*[0-9])_(.*)", "\\1", subjs_fmri$subj)


## plot fmri and behav data

mat_fmri <- table(subjs_fmri$subj, paste0(subjs_fmri$wave, "_", subjs_fmri$session)) %>%
  as.data.frame %>%
  rename(subj = Var1, wave_session = Var2) %>%
  mutate(id = "fmri")
mat_behav <- subjs_behav %>% 
  mutate(wave_session = paste0(wave, "_", session), Freq = nrow_eprime %in% c(216, 240), id = "behav") %>%
  select(subj, wave_session, Freq, id)
mat <- full_join(mat_fmri, mat_behav) 
## Joining, by = c("subj", "wave_session", "Freq", "id")
mat %>%
  mutate(wave_session = gsub("wave([1-3])_(...).*", "w\\1_\\2", wave_session)) %>%
  ggplot(aes(wave_session, subj, fill = Freq)) +
  geom_raster() +
  facet_grid(cols = vars(id)) +
  scale_fill_gradient(low = "white", high = "black") +
  theme(legend.position = "none") +
  labs(x = "wave_session", y = "subj", title = "missing (white)")

## MB8 subjs

subjs_mb8 <- c("162026", "179245", "182840", "198855", "205220", "214524", "568963", "623844")


## twins

# from /data/nil-external/ccp/JosetAEtzel/DMCC_files/getPairs.Rsubjs_twins

subjs_mz <- data.table(
  a = c(
    "DMCC1624043", "DMCC1971064", "233326", "209228", "DMCC4260551", "DMCC6755891", "DMCC6904377", "DMCC9850294",
    "162026", "DMCC8078683", "DMCC7297690", "DMCC6960387", "DMCC5065053", "DMCC5820265", "DMCC3206338"
    ),
  b = c(
    "DMCC3204738", "DMCC3509558", "352738", "765864", "DMCC7921988", "DMCC8050964", "DMCC9953810", "DMCC2560452",
    "568963", "DMCC6484785", "DMCC1596165", "DMCC8260571", "DMCC4368773", "DMCC3091953", "DMCC4854984"
    )
)
subjs_mz_mbsr <- data.table(
  a = c(
    "182436", "115825", "250427", "171330", "DMCC2834766", "DMCC2803654", "DMCC4191255", "562345",
    "DMCC5195268", "DMCC6661074", "DMCC5775387", "DMCC6371570", "DMCC2442951", "DMCC1328342",
    "DMCC3963378", "DMCC3876181", "DMCC5244053"
    ),
  b = c(
    "178647", "178243", "877168", "393550", "DMCC6671683", "DMCC9478705", "DMCC6418065", "130518",
    "DMCC8033964", "DMCC6705371", "DMCC6721369", "DMCC9441378", "DMCC6627478", "DMCC5009144",
    "DMCC2609759", "DMCC0472647", "DMCC8760894"
    )
  
)
subjs_dz <- data.table(
  a = c("198855", "DMCC8214059"),
  b = c("623844", "DMCC3062542")
)
subjs_dz_mbsr <- data.table(
  a = c("130114"),
  b = c("155938")
)
subjs_twins <- rbindlist(list(mz = subjs_mz, mz = subjs_mz_mbsr, dz = subjs_dz, dz = subjs_dz_mbsr), idcol = "twin")
subjs_twins[, twinpair := 1:.N]
subjs_twins <- melt(subjs_twins, id.vars = c("twin", "twinpair"), value.name = "subj")[, -"variable"]



## poor quality

# from: https://github.com/ccplabwustl/R01/blob/master/Jo/datasetQC/blocksTasksToDrop.txt

subjs_jolist <- data.table(
  rbind(
    ## DMCC2 for behavior ("sleeping"):
    c("behavior", "wave1", "233326", "proactive"),
    c("behavior", "wave1", "DMCC6371570", "reactive"),
    # DMCC2 for movement (too much censoring):
    c("movement", "wave1", "873968", "proactive"),
    c("movement", "wave1", "DMCC1971064", "proactive"),
    # DMCC3 for behavior ("sleeping"):
    c("behavior", "wave2", "161832", "baseline")
  )
)
names(subjs_jolist) <- c("issue", "wave", "subj", "session")



## bind

#subjs <- full_join(left_join(full_join(subjs_behav, subjs_fmri), subjs_twins), subjs_jolist)

subjs_with_fmri <- inner_join(subjs_behav, subjs_fmri, by = c("session", "subj", "wave"))
subjs_with_fmri <- left_join(subjs_with_fmri, subjs_twins, by = "subj")  ## add twin info
subjs <- full_join(subjs_with_fmri, subjs_jolist, by = c("session", "subj", "wave"))  ## add jolist

nrow(subjs) == nrow(subjs_with_fmri) ## TRUE if all subj*session*waves in jolist are in subjs
## [1] TRUE

initial exclusions

missing and incomplete fmri runs

subjs_samp <- subjs[nrow_eprime %in% c(216, 240)]

subjs[nrow_eprime == 0]
##            subj   session  wave nrow_eprime needs_rerun twin twinpair issue
##  1:      171330  baseline wave1           0       FALSE   mz       19  <NA>
##  2:      843151  baseline wave1           0       FALSE <NA>       NA  <NA>
##  3: DMCC8033964  baseline wave1           0       FALSE   mz       24  <NA>
##  4:      182840 proactive wave1           0       FALSE <NA>       NA  <NA>
##  5:      250427 proactive wave1           0       FALSE   mz       18  <NA>
##  6: DMCC1624043 proactive wave1           0       FALSE   mz        1  <NA>
##  7:      250427  reactive wave1           0       FALSE   mz       18  <NA>
##  8:      352738  reactive wave1           0       FALSE   mz        3  <NA>
##  9: DMCC3204738  reactive wave1           0       FALSE   mz        1  <NA>
## 10: DMCC7297690  reactive wave1           0       FALSE   mz       11  <NA>
## 11:      130114  baseline wave2           0       FALSE   dz       35  <NA>
## 12: DMCC6418065  baseline wave2           0       FALSE   mz       22  <NA>
## 13: DMCC6671683  baseline wave2           0       FALSE   mz       20  <NA>
## 14:      130114 proactive wave2           0       FALSE   dz       35  <NA>
## 15: DMCC6418065 proactive wave2           0       FALSE   mz       22  <NA>
## 16: DMCC6671683 proactive wave2           0       FALSE   mz       20  <NA>
## 17:      130114  reactive wave2           0       FALSE   dz       35  <NA>
## 18: DMCC6671683  reactive wave2           0       FALSE   mz       20  <NA>
subjs[nrow_eprime > 0 & nrow_eprime < 216]
##      subj  session  wave nrow_eprime needs_rerun twin twinpair issue
## 1: 178243 baseline wave1         189       FALSE   mz       17  <NA>
  • Several subjs are marked as having no eprime rows. these subjs may be those without audio recording. exclude for now.
  • One subj has a partially complete set of runs in wave 1 baseline. Exclude.

all MB8 subjects

kable(subjs_samp[subj %in% subjs_mb8])
subj session wave nrow_eprime needs_rerun twin twinpair issue
162026 baseline wave1 216 FALSE mz 9 NA
179245 baseline wave1 216 FALSE NA NA NA
182840 baseline wave1 216 FALSE NA NA NA
205220 baseline wave1 216 FALSE NA NA NA
568963 baseline wave1 216 FALSE mz 9 NA
623844 baseline wave1 216 FALSE dz 33 NA
162026 proactive wave1 216 FALSE mz 9 NA
179245 proactive wave1 216 FALSE NA NA NA
205220 proactive wave1 216 FALSE NA NA NA
568963 proactive wave1 216 FALSE mz 9 NA
162026 reactive wave1 240 FALSE mz 9 NA
179245 reactive wave1 240 FALSE NA NA NA
182840 reactive wave1 240 FALSE NA NA NA
205220 reactive wave1 240 FALSE NA NA NA
568963 reactive wave1 240 FALSE mz 9 NA
623844 reactive wave1 240 FALSE dz 33 NA
568963 baseline wave2 216 FALSE mz 9 NA
568963 proactive wave2 216 FALSE mz 9 NA
568963 reactive wave2 240 FALSE mz 9 NA
subjs_samp <- subjs_samp[!subj %in% subjs_mb8]

568963 was MB8 in wave 1, but MB4 in wave 2. Exclude this subj’s wave2 data for now (but could include later).

quality-based exclusions

movement

kable(subjs_samp[issue == "movement"])
subj session wave nrow_eprime needs_rerun twin twinpair issue
873968 proactive wave1 216 FALSE NA NA movement
DMCC1971064 proactive wave1 216 FALSE mz 2 movement
subjs_samp <- subjs_samp[is.na(issue) | issue != "movement"]

Two wave\(\cdot\)session\(\cdot\)subjs have excessive movements, as defined by Jo’s criteria (more than 20% of frames censored, i.e., FD > 0.9). Excluded.

RT

d_rt <- d %>% 
  right_join(subjs_samp) %>%
  filter(rt > 0, acc == 1)
## Joining, by = c("wave", "session", "subj")
d_rt %>%
  
  ggplot(aes(rt, subj, fill = wave, color = wave)) +
  geom_density_ridges(alpha = 0.2) +
  facet_grid(cols = vars(session)) +
  
  scale_fill_viridis_d() +
  scale_color_viridis_d()
## Picking joint bandwidth of 43.5
## Picking joint bandwidth of 42.4
## Picking joint bandwidth of 42.6

l <- split(d_rt, droplevels(interaction(d_rt$subj, d_rt$wave, d_rt$session)))
d_rt$rt_resid <- unlist(lapply(l, function(x) resid(lm(rt ~ item, x))))

d_rt %>% 
  
  # filter(subj %in% "849971") %>%
  
  ggplot(aes(sample = rt_resid)) +
  geom_qq(size = 0.75, shape = 16) +
  geom_qq_line() +
  facet_grid(rows = vars(subj), cols = vars(wave, session))

Not much looks weird from this angle.

Error

d_er <- d %>% right_join(subjs_samp)
## Joining, by = c("wave", "session", "subj")
sum_er <- d_er %>%
  
  group_by(subj, session, wave) %>%
  mutate(total = n()) %>%
  group_by(subj, session, wave, acc_final) %>%
  summarize(
    total = unique(total),
    percent = sum(n()) / total*100
    )
## `summarise()` has grouped output by 'subj', 'session', 'wave'. You can override using the `.groups` argument.
sum_er %>%
  
  filter(acc_final != "1") %>%
  
  ggplot(aes(percent, subj, color = acc_final)) +
  geom_point(size = 2) +
  geom_vline(xintercept = 10, color = "grey60") +
  facet_grid(cols = vars(wave, session)) +
  
  scale_color_viridis_d() +
  
  theme(legend.position = c(0.8, 0.1))

Line shows 10% threshold for “no response” used in Freund et al. (2021) JNeurosci.

bad <- sum_er %>% filter(acc_final == "no.response" & percent > 10)

bad
## # A tibble: 8 x 6
## # Groups:   subj, session, wave [8]
##   subj        session   wave  acc_final   total percent
##   <chr>       <chr>     <chr> <chr>       <int>   <dbl>
## 1 160830      reactive  wave1 no.response   240    15.4
## 2 161832      baseline  wave2 no.response   216    37.0
## 3 161832      reactive  wave2 no.response   240    27.9
## 4 197449      baseline  wave1 no.response   216    17.1
## 5 197449      baseline  wave3 no.response   216    13.4
## 6 233326      proactive wave1 no.response   216    14.4
## 7 DMCC5009144 baseline  wave2 no.response   216    13.4
## 8 DMCC6371570 reactive  wave1 no.response   240    32.5
sum(subjs_samp$issue == "behavior", na.rm = TRUE)
## [1] 3
subjs_samp <- subjs_samp %>%
  filter(!paste0(subj, session, wave) %in% paste0(bad$subj, bad$session, bad$wave)) %>%
  as.data.table

With that threshold, 3 wave\(\cdot\)session\(\cdot\)subjs don’t meet criteria. Three of those subjects were also ID’d by Jo’s (less strict) “sleeping” criteria. (Her criteria didn’t ID any others not picked up by the 10% threshold.)

session\(\cdot\)wave splits

mat_good_data <- 
  table(subjs_samp$subj, paste0(subjs_samp$wave, "_", subjs_samp$session)) %>%
  as.data.frame %>%
  rename(subj = Var1, wave_session = Var2)
x <- as.matrix(table(subjs_samp$subj, paste0(subjs_samp$wave, "_", subjs_samp$session)))
d <- dist(x, method = "euclidean") # distance matrix
fit <- hclust(d, method="ward.D")
# plot(fit)
groups <- cutree(fit, k = 5) # cut tree into 5 clusters
mat_good_data$subj <- factor(mat_good_data$subj, levels = names(sort(groups)))


mat_good_data %>%
  mutate(wave_session = gsub("wave([1-3])_(...).*", "w\\1_\\2", wave_session)) %>%
  ggplot(aes(wave_session, subj, fill = Freq)) +
  geom_raster() +
  scale_fill_gradient(low = "white", high = "black") +
  theme(legend.position = "none") +
  labs(x = "wave_session", y = "subj", title = "missing (white)")

baseline–proactive: BP

B1P1

twins_in_set <- function(x) x[duplicated(x)]

subjs_bp <- subjs_samp[wave == "wave1" & session %in% c("baseline", "proactive")]
subjs_both_b1p1 <- subjs_bp$subj[duplicated(subjs_bp$subj)]  ## get subjs in both
subjs_bp <- subjs_bp[subj %in% subjs_both_b1p1]

n_b1p1 <- length(unique(subjs_bp$subj))  ## wave 1 n subjs
twins_b1p1 <- twins_in_set(subjs_bp[session == "baseline" & !is.na(twin)]$twinpair)  ## wave 1 n twinpairs
n_twins_b1p1 <- length(twins_b1p1)  ## wave 1 n twinpairs

n_b1p1  ## n total
## [1] 83
n_twins_b1p1  ## n twinpairs
## [1] 19
n_b1p1 - n_twins_b1p1  ## n unrelated
## [1] 64

B2P2, B3P3

subjs_bp2 <- subjs_samp[wave == "wave2" & session %in% c("baseline", "proactive")]
subjs_both_b2p2 <- subjs_bp2$subj[duplicated(subjs_bp2$subj)]  ## get subjs in both
subjs_bp2 <- subjs_bp2[subj %in% subjs_both_b2p2]

n_b2p2 <- length(unique(subjs_bp2$subj))  ## wave 2 n subjs
twins_b2p2 <- twins_in_set(subjs_bp2[session == "baseline" & !is.na(twin)]$twinpair)
n_twins_b2p2 <- length(twins_b2p2)  ## wave 1 n twinpairs

n_b2p2  ## n total
## [1] 40
n_twins_b2p2  ## n twinpairs
## [1] 13
n_b2p2 - n_twins_b2p2  ## n unrelated
## [1] 27
length(intersect(subjs_bp2$subj, subjs_bp$subj))  ## subjs in BP2 that were in BP2
## [1] 34
subjs_bp3 <- subjs_samp[wave == "wave3" & session %in% c("baseline", "proactive")]
subjs_both_b3p3 <- subjs_bp3$subj[duplicated(subjs_bp3$subj)]  ## get subjs in both
subjs_bp3 <- subjs_bp3[subj %in% subjs_both_b3p3]

n_b3p3 <- length(unique(subjs_bp3$subj))  ## wave 3 n subjs
twins_b3p3 <- twins_in_set(subjs_bp3[session == "baseline" & !is.na(twin)]$twinpair)  ## wave 1 n twinpairs
n_twins_b3p3 <- length(twins_b3p3)  ## wave 1 n twinpairs

n_b3p3  ## n total
## [1] 5
n_twins_b3p3  ## n twinpairs
## [1] 1
n_b3p3 - n_twins_b3p3  ## n unrelated
## [1] 4

proactive–baseline: PB

B2P1

subjs_b2p1 <- subjs_samp[(wave == "wave1" & session %in% "proactive") | (wave == "wave2" & session %in% "baseline")]
subjs_both_b2p1 <- subjs_b2p1$subj[duplicated(subjs_b2p1$subj)]  ## get subjs in both
subjs_b2p1 <- subjs_b2p1[subj %in% subjs_both_b2p1]

n_b2p1 <- length(unique(subjs_b2p1$subj))  ## wave 2 n subjs
twins_b2p1 <- twins_in_set(subjs_b2p1[session == "baseline" & !is.na(twin)]$twinpair)
n_twins_b2p1 <- length(twins_b2p1)  ## wave 1 n twinpairs

n_b2p1  ## n total
## [1] 39
n_twins_b2p1  ## n twinpairs
## [1] 12
n_b2p1 - n_twins_b2p1  ## n unrelated
## [1] 27
subjs_b3p1 <- subjs_samp[(wave == "wave1" & session %in% "proactive") | (wave == "wave3" & session %in% "baseline")]
subjs_both_b3p1 <- subjs_b3p1$subj[duplicated(subjs_b3p1$subj)]  ## get subjs in both
subjs_b3p1 <- subjs_b3p1[subj %in% subjs_both_b3p1]

n_b3p1 <- length(unique(subjs_b3p1$subj))  ## wave 2 n subjs
twins_b3p1 <- twins_in_set(subjs_b3p1[session == "baseline" & !is.na(twin)]$twinpair)  ## wave 1 n twinpairs
n_twins_b3p1 <- length(twins_b3p1)  ## wave 1 n twinpairs

n_b3p1  ## n total
## [1] 5
n_twins_b3p1  ## n twinpairs
## [1] 1
n_b3p1 - n_twins_b3p1  ## n unrelated
## [1] 4

reactive session

wave1

subjs_r1 <- subjs_samp[wave == "wave1" & session == "reactive"]

n_r1 <- length(unique(subjs_r1$subj))  ## wave 1 n subjs
twins_r1 <- twins_in_set(subjs_r1[session == "reactive" & !is.na(twin)]$twinpair)  ## wave 1 n twinpairs
n_twins_r1 <- length(twins_r1)  ## wave 1 n twinpairs

n_r1  ## n total
## [1] 91
n_twins_r1  ## n twinpairs
## [1] 23
n_r1 - n_twins_r1  ## n unrelated
## [1] 68

reactive test–retest

subjs_r1r2 <- subjs_samp[session == "reactive" & wave %in% c("wave1", "wave2")]
subjs_both_r1r2 <- subjs_r1r2$subj[duplicated(subjs_r1r2$subj)]  ## get subjs in both
subjs_r1r2 <- subjs_r1r2[subj %in% subjs_both_r1r2]

n_r1r2 <- length(unique(subjs_r1r2$subj))  ## wave 1 n subjs
twins_r1r2 <- twins_in_set(subjs_r1r2[wave == "wave1" & !is.na(twin)]$twinpair)  ## wave 1 n twinpairs
n_twins_r1r2 <- length(twins_r1r2)  ## wave 1 n twinpairs

n_r1r2  ## n total
## [1] 39
n_twins_r1r2  ## n twinpairs
## [1] 12
n_r1r2 - n_twins_r1r2  ## n unrelated
## [1] 27

define groups

out <- subjs_samp[wave %in% c("wave1", "wave2"), -c("nrow_eprime", "twin", "issue")]

sample_cotwins <- function(x) {
  set.seed(0)
  twinpairs <- unique(x$twinpair[duplicated(x$twinpair)])
  x <- x %>% filter(twinpair %in% twinpairs)  ## only twins in sample
  x %>%
    group_by(twinpair) %>%
    sample_n(1) %>%
    pull(subj)
}

mc_mi groups

cotwins_b1p1 <- sample_cotwins(subjs_twins[twinpair %in% twins_b1p1])  ## to exclude
out$is_mc_mi <- FALSE
out[
  wave == "wave1" & session %in% c("baseline", "proactive") & subj %in% setdiff(subjs_both_b1p1, cotwins_b1p1)
  ]$is_mc_mi <- TRUE

out$is_mc_mi_cotwin <- FALSE
out[
  wave == "wave1" & session %in% c("baseline", "proactive") & subj %in% cotwins_b1p1
  ]$is_mc_mi_cotwin <- TRUE


sum(out$is_mc_mi) / 2
## [1] 64
sum(out$is_mc_mi_cotwin) / 2
## [1] 19

mi_mc groups

### order control

cotwins_b2p1 <- sample_cotwins(subjs_twins[twinpair %in% twins_b2p1])  ## to exclude
out$is_mi_mc <- FALSE
out[
    ((wave == "wave2" & session %in% "baseline") | (wave == "wave1" & session %in% "proactive")) &
    subj %in% setdiff(subjs_both_b2p1, cotwins_b2p1)
  ]$is_mi_mc <- TRUE

out$is_mi_mc_cotwin <- FALSE
out[
    ((wave == "wave2" & session %in% "baseline") | (wave == "wave1" & session %in% "proactive")) &
    subj %in% cotwins_b2p1
  ]$is_mi_mc_cotwin <- TRUE


sum(out$is_mi_mc) / 2
## [1] 27
sum(out$is_mi_mc_cotwin) / 2
## [1] 12
## retest 

subjs_bp_retest <- intersect(subjs_both_b2p2, subjs_both_b1p1)
cotwins_bp_retest <- sample_cotwins(subjs_twins[subj %in% subjs_bp_retest])  ## to exclude
out$is_mc_mi_retest <- FALSE
out[
    session %in% c("baseline", "proactive") & subj %in% setdiff(subjs_bp_retest, cotwins_bp_retest)
  ]$is_mc_mi_retest <- TRUE

out$is_mc_mi_retest_cotwin <- FALSE
out[
    session %in% c("baseline", "proactive") & subj %in% cotwins_bp_retest
  ]$is_mc_mi_retest_cotwin <- TRUE


sum(out$is_mc_mi_retest) / 4
## [1] 26
sum(out$is_mc_mi_retest_cotwin) / 4
## [1] 8

ispc groups

subjs_r_retest <- unique(subjs_r1r2$subj)
cotwins_r_retest <- sample_cotwins(subjs_twins[subj %in% subjs_r_retest])  ## to exclude
out$is_ispc_retest <- FALSE
out[
    session %in% c("reactive") & subj %in% setdiff(subjs_r_retest, cotwins_r_retest)
  ]$is_ispc_retest <- TRUE

out$is_ispc_retest_cotwin <- FALSE
out[
    session %in% c("reactive") & subj %in% cotwins_r_retest
  ]$is_ispc_retest_cotwin <- TRUE


sum(out$is_ispc_retest) / 2
## [1] 27
sum(out$is_ispc_retest_cotwin) / 2
## [1] 12

write

fwrite(out, here("out", "subjlist.csv"))